# =============================================================================
# APPENDIX J FIGURE 13A: BOIX & SVOLIK ROBUSTNESS WITH BOOTSTRAP
# Bootstrap uncertainty analysis for Weibull survival models
# =============================================================================

# --- PACKAGES ----------------------------------------------------------------
library(dplyr)
library(survival)
library(flexsurv)
library(survminer)
library(tidyverse)
library(matrixStats)
library(haven)
library(xtable)
library(texreg)

# --- CONFIGURATION -----------------------------------------------------------
set.seed(123)  # For reproducibility
n_bootstrap <- 1000

# --- DATA LOADING ------------------------------------------------------------
data <- read_dta("leader_tvc_2.dta", encoding = "latin1") %>%
  mutate(COWcode = ccode)
latent <- read.csv("estimates_independent.csv")

# --- DATA PREPARATION --------------------------------------------------------
# Deduplicate latent estimates
latent_unique <- latent %>%
  distinct(COWcode, year, .keep_all = TRUE)

# Build master leader-year dataframe
df_master <- data %>%
  inner_join(latent_unique, by = c("COWcode", "year")) %>%
  arrange(COWcode, year, leadid) %>%
  group_by(COWcode) %>%
  mutate(
    dyn.estimates_lag = lag(dyn.estimates, 1),
    dyn.up_lag        = lag(dyn.up, 1),
    dyn.lo_lag        = lag(dyn.lo, 1)
  ) %>%
  ungroup() %>%
  group_by(leadid) %>%
  mutate(time0 = lag(time, default = 0)) %>%
  ungroup()

# Define covariates used in all models
all_covs <- c(
  "time0", "time", "c_coup", "c_natural", "c_revolt",
  "legislature", "leg_growth_2", "dyn.estimates_lag",
  "gdp_1k", "chgdpen_fearonlaitin", "Oil_fearonlaitin",
  "postcoldwarlag", "civiliandictatorshiplag", "militarydictatorshiplag",
  "communist", "lpopl1_fearonlaitin", "ethfrac_fearonlaitin",
  "relfrac_fearonlaitin", "age"
)

# Complete case analysis
df_cc <- df_master %>%
  filter(complete.cases(select(., all_of(all_covs))))

# --- ANALYSIS ----------------------------------------------------------------
# Fit baseline Weibull models
fit_coups_restr <- flexsurvreg(
  Surv(time0, time, c_coup) ~ legislature + leg_growth_2 + 
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
    postcoldwarlag + civiliandictatorshiplag +
    militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin +
    relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_coups_new <- flexsurvreg(
  Surv(time0, time, c_coup) ~ dyn.estimates_lag + 
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
    postcoldwarlag + civiliandictatorshiplag +
    militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin +
    relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_natural_restr <- flexsurvreg(
  Surv(time0, time, c_natural) ~ legislature + leg_growth_2 + 
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
    postcoldwarlag + civiliandictatorshiplag +
    militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin +
    relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_natural_new <- flexsurvreg(
  Surv(time0, time, c_natural) ~ dyn.estimates_lag + 
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
    postcoldwarlag + civiliandictatorshiplag +
    militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin +
    relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_revolt_restr <- flexsurvreg(
  Surv(time0, time, c_revolt) ~ legislature + leg_growth_2 + 
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
    postcoldwarlag + civiliandictatorshiplag +
    militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin +
    relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_revolt_new <- flexsurvreg(
  Surv(time0, time, c_revolt) ~ dyn.estimates_lag + 
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
    postcoldwarlag + civiliandictatorshiplag +
    militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin +
    relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

# Display baseline results
texreg(list(
  fit_natural_restr, fit_natural_new, 
  fit_coups_restr, fit_coups_new, 
  fit_revolt_restr, fit_revolt_new
))

# Define survival formulas for jackknife analysis
surv_formula_coup_new <- Surv(time0, time, c_coup) ~ dyn.estimates_lag +
  gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
  postcoldwarlag + civiliandictatorshiplag +
  militarydictatorshiplag + communist +
  lpopl1_fearonlaitin + ethfrac_fearonlaitin +
  relfrac_fearonlaitin + age

surv_formula_natural_new <- Surv(time0, time, c_natural) ~ dyn.estimates_lag +
  gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
  postcoldwarlag + civiliandictatorshiplag +
  militarydictatorshiplag + communist +
  lpopl1_fearonlaitin + ethfrac_fearonlaitin +
  relfrac_fearonlaitin + age

surv_formula_revolt_new <- Surv(time0, time, c_revolt) ~ dyn.estimates_lag +
  gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin +
  postcoldwarlag + civiliandictatorshiplag +
  militarydictatorshiplag + communist +
  lpopl1_fearonlaitin + ethfrac_fearonlaitin +
  relfrac_fearonlaitin + age


# --- BOOTSTRAP ANALYSIS------------------------------------------
# Bootstrap function with uncertainty intervals
bootstrap_survival <- function(data, n_bootstrap = 1000) {
  boot_results <- list(
    fit_natural = numeric(n_bootstrap),
    fit_coups   = numeric(n_bootstrap),
    fit_revolt  = numeric(n_bootstrap)
  )
  
  rhs <- paste(
    "dyn.estimates_lag_boot", 
    "gdp_1k", "chgdpen_fearonlaitin", "Oil_fearonlaitin",
    "postcoldwarlag", "civiliandictatorshiplag", "militarydictatorshiplag",
    "communist", "lpopl1_fearonlaitin", "ethfrac_fearonlaitin", 
    "relfrac_fearonlaitin", "age", 
    sep = " + "
  )
  
  for (i in 1:n_bootstrap) {
    data$dyn.estimates_lag_boot <- rnorm(
      n = nrow(data),
      mean = data$dyn.estimates_lag,
      sd = (data$dyn.up_lag - data$dyn.lo_lag) / 2
    )
    
    formula_natural <- as.formula(paste("Surv(time0, time, c_natural) ~", rhs))
    formula_coups   <- as.formula(paste("Surv(time0, time, c_coup) ~", rhs))
    formula_revolt  <- as.formula(paste("Surv(time0, time, c_revolt) ~", rhs))
    
    model_nat <- try(flexsurvreg(formula_natural, data = data, dist = "weibull"), silent = TRUE)
    model_coup <- try(flexsurvreg(formula_coups, data = data, dist = "weibull"), silent = TRUE)
    model_rev <- try(flexsurvreg(formula_revolt, data = data, dist = "weibull"), silent = TRUE)
    
    if (!inherits(model_nat, "try-error")) {
      boot_results$fit_natural[i] <- coef(model_nat)["dyn.estimates_lag_boot"]
    }
    if (!inherits(model_coup, "try-error")) {
      boot_results$fit_coups[i] <- coef(model_coup)["dyn.estimates_lag_boot"]
    }
    if (!inherits(model_rev, "try-error")) {
      boot_results$fit_revolt[i] <- coef(model_rev)["dyn.estimates_lag_boot"]
    }
  }
  
  return(boot_results)
}

# Run bootstrap
boot_results <- bootstrap_survival(df_cc, n_bootstrap = 1000)

# Plot bootstrap distributions
par(mfrow = c(1, 3))
hist(boot_results$fit_natural, breaks = 30, col = "gray", border = "white",
     main = "A. Natural Causes", xlab = "Coefficient on Latent Measure", na.rm = TRUE)
hist(boot_results$fit_coups, breaks = 30, col = "gray", border = "white",
     main = "B. Coups", xlab = "Coefficient on Latent Measure", na.rm = TRUE)
hist(boot_results$fit_revolt, breaks = 30, col = "gray", border = "white",
     main = "C. Revolts", xlab = "Coefficient on Latent Measure", na.rm = TRUE)
par(mfrow = c(1, 1))